import pandas as pd
import requests
import random
import time
import os

from const import pdf_name

rawdata=pd.read_excel('company_annual_reports.xlsx', sheet_name = 0)

def get_data(iloc):
    name = rawdata.at[iloc,'Company']
    # print('name', name)
    years = []
    for year in range(2010,2023):
        # print('year', year, rawdata.at[iloc, str(year)])
        years.append({
            'name': str(year),
            'url': rawdata.at[iloc, str(year)] if not pd.isna(rawdata.at[iloc, str(year)]) else None
        })
    return name,years

def get_filepath(name,year):
    file_path = os.path.join(os.getcwd(), pdf_name)
    # 如果本地不存在 pdf 目录，则创建一个
    if not os.path.exists(file_path):
        os.mkdir(file_path)

    file_name = "{}{}年年度报告.pdf".format(name,year)
    file_full_name = os.path.join(file_path,file_name)
    return file_full_name

def download_pdf(url,file_full_name):
    headers = {'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60"}
    res = requests.get(url,headers=headers)
    with open(file_full_name,"wb") as fp:
        for chunk in res.iter_content(chunk_size=1024):
            if chunk:
                fp.write(chunk)
                
for iloc in range(rawdata.shape[0]):
    name, years = get_data(iloc)
    # print('name, years: ', name, years)
    for year in years:
        print("开始下载{}的{}年报".format(name, year['name']))
        file_full_name = get_filepath(name, year['name'])
        if year['url']:
            download_pdf(year['url'], file_full_name)
            time.sleep(random.uniform(3,4))
            print("===========下载完成==========")
        else:
            print("没有{}的{}年报".format(name, year['name']))

